from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


# pip install langchain_chroma langchain_openai
file_path = "nke-10k-2023.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

# print(len(docs))
# print(docs[0].page_content[0:100])
# print(docs[0].metadata)

llm = ChatOpenAI(
    api_key="sk-CftUbVSsA61lwwgMz9xvt6znTunQZfgBP8ZCVLbQsKfXUR6k",
    model='deepseek-ai/DeepSeek-V3',
    base_url="https://www.henapi.top/v1",
    temperature=0
)

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits=text_splitter.split_documents(docs)


openAIEmbeddings=OpenAIEmbeddings(api_key="sk-CftUbVSsA61lwwgMz9xvt6znTunQZfgBP8ZCVLbQsKfXUR6k",
    model='text-embedding-3-small',
    base_url="https://www.henapi.top/v1")

vectorstore=Chroma.from_documents(documents=splits,embedding=openAIEmbeddings)
retriever=vectorstore.as_retriever()

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}"),
    ]
)

question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)
results=rag_chain.invoke({"input": "What was Nike's revenue in 2023?"})
print(results)

print(results["context"][0].page_content)

print(results["context"][0].metadata)